import requests
from urllib.parse import urlparse
from urllib.parse import urljoin
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
url='https://www.chinatax.gov.cn/getFileListByCodeId'
base_url1='https://fgk.chinatax.gov.cn/'
data = {'channelId': '2cb303fdee614232b79552d52bb057d6', 'page': '1','size':'10'}
r = requests.post(url,headers=headers, data=data)
#print(r.json().get("results").get("data").get("results"))
rslist=r.json().get("results").get("data").get("results")
datalist =[]
for rs in rslist:
    d={}
    d["title"]=rs.get("title")
    d["url"]=rs.get("url")
    url1 = urlparse(d["url"])
    urljoin( base_url1, url1.path)
    d["path"]=urljoin( base_url1, url1.path)
    d["publishedTimeStr"]=rs.get("publishedTimeStr")
    d["channelName"]=rs.get("channelName")
    d["subTitleHtml"]=rs.get("subTitleHtml")
    datalist.append(d)
    # print(d)
    # print("\n")
    
d1= datalist[0];
print(d1["path"])
# r = requests.get(d1["path"],headers=headers)
# print(r.text)
    
# for d in datalist:
#     r = requests.get(d["path"],headers=headers)
#     print(r.text)
    
#https://fgk.chinatax.gov.cn/zcfgk/c102416/c5239243/content.html
#http://www.chinatax.gov.cn/zcfgk/c102416/c5239243/content.html
#urljoin('https://www.baidu.com', 'FAQ.html')